VSURF

Author

Miguel Fudolig

library(tidyverse)
library(ggplot2)
library(lavaan)
library(car)
library(glmnet)
library(randomForestSRC)
library(caret)
library(ggRandomForests)
library(VSURF)

Data set

This data set is from the 2015 Asian American Quality of Life survey. Participants are from Austin, Texas.

Input data set

qol <- read_csv("AAQoL.csv") |> mutate(across(where(is.character), ~as.factor(.x))) |> 
  mutate(`English Difficulties`=relevel(`English Difficulties`,ref="Not at all"),
         `English Speaking`=relevel(`English Speaking`,ref="Not at all"),
         Ethnicity = relevel(Ethnicity,ref="Chinese"),
         Religion=relevel(Religion,ref="None")) |> 
  mutate(Income_median = case_match(Income,"$0 - $9,999"~"Below",
                                         "$10,000 - $19,999" ~"Below",
                                         "$20,000 - $29,999"~"Below",
                                         "$30,000 - $39,999"~"Below",
                                         "$40,000 - $49,999"~"Below",
                                         "$50,000 - $59,999"~"Below",
                                         "$60,000 - $69,999"~"Above",
                                         "$70,000 and over"~"Above",
                                          .default=Income)) |> 
  mutate(Income_median = factor(Income_median, levels=c("Below","Above"))) |> 
  mutate(across(c(`Family Respect`:`Togetherness`,`Close-knit Community`:`Community Trust`),~relevel(.x,ref="Strongly disagree")))
New names:
Rows: 2609 Columns: 231
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(190): Gender, Ethnicity, Marital Status, No One, Spouse, Children, Gran... dbl
(41): Survey ID, Age, Education Completed, Household Size, Grandparent,...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Other` -> `Other...17`
• `Other` -> `Other...89`
qol |> DT::datatable()
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

Family

rfdata <- qol |> filter(Family %in% c("No","Yes")) |> 
  mutate(Family=droplevels(Family)) |> 
  select(Family, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Family~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Family~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Family ~ ., imbal, na.action = "na.omit", parallel = T, : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 1.1 mins 

 VSURF selected: 
    34 variables at thresholding step (in 8.5 secs)
    33 variables at interpretation step (in 7.4 secs)
    16 variables at prediction step (in 47.1 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Ethnicity"            "Age"                  "English.Difficulties"
 [4] "Full.Time.Employment" "Helpful.Community"    "Religion"            
 [7] "Get.Along"            "Expression"           "Religious.Importance"
[10] "English.Speaking"     "Community.Trust"      "Religious.Attendance"
[13] "Family.Respect"       "Close.knit.Community" "Successful.Family"   
[16] "Loyalty"             
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Ethnicity"               "Age"                    
 [3] "English.Difficulties"    "Full.Time.Employment"   
 [5] "Helpful.Community"       "Religion"               
 [7] "See.Friends"             "Discrimination"         
 [9] "Get.Along"               "Helpful.Family"         
[11] "Expression"              "Religious.Importance"   
[13] "Helpful.Friends"         "English.Speaking"       
[15] "Community.Trust"         "See.Family"             
[17] "Religious.Attendance"    "Close.Family"           
[19] "Community.Shares.Values" "Family.Respect"         
[21] "Close.knit.Community"    "Successful.Family"      
[23] "Feel.Close"              "Close.Friends"          
[25] "Similar.Values"          "Loyalty"                
[27] "Spend.Time.Together"     "Trust"                  
[29] "Togetherness"            "Family.Pride"           
[31] "Gender"                  "Dental.Insurance"       
[33] "Income_median"          
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.2337747

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance
1                Ethnicity    0.03111       0.00084
2                      Age    0.01877       0.00053
3     English.Difficulties    0.01638       0.00072
4     Full.Time.Employment    0.01598       0.00078
5        Helpful.Community    0.01584       0.00086
6                 Religion    0.01454       0.00067
7              See.Friends    0.01239       0.00062
8           Discrimination    0.01230       0.00064
9                Get.Along    0.01030       0.00057
10          Helpful.Family    0.00993       0.00059
11              Expression    0.00910       0.00071
12    Religious.Importance    0.00887       0.00043
13         Helpful.Friends    0.00864       0.00055
14        English.Speaking    0.00848       0.00054
15         Community.Trust    0.00848       0.00062
16              See.Family    0.00818       0.00061
17    Religious.Attendance    0.00803       0.00043
18            Close.Family    0.00789       0.00063
19 Community.Shares.Values    0.00765       0.00028
20          Family.Respect    0.00679       0.00067
21    Close.knit.Community    0.00678       0.00032
22       Successful.Family    0.00649       0.00035
23              Feel.Close    0.00576       0.00033
24           Close.Friends    0.00543       0.00073
25          Similar.Values    0.00465       0.00035
26                 Loyalty    0.00464       0.00043
27     Spend.Time.Together    0.00458       0.00036
28                   Trust    0.00457       0.00035
29            Togetherness    0.00436       0.00037
30            Family.Pride    0.00329       0.00028
31                  Gender    0.00310       0.00028
32        Dental.Insurance    0.00284       0.00028
33           Income_median    0.00238       0.00023
34        Health.Insurance    0.00163       0.00015

Logistic regression (Interpretation)

lr <- rfdata |> select(Family,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Family~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Family ~ ., family = binomial, data = lr)

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                                0.462345   0.621522   0.744 0.456942
EthnicityAsian Indian                     -0.187291   0.292473  -0.640 0.521930
EthnicityFilipino                         -0.355945   0.222263  -1.601 0.109276
EthnicityKorean                           -0.682400   0.166699  -4.094 4.25e-05
EthnicityOther                            -0.746636   0.246686  -3.027 0.002473
EthnicityVietnamese                       -0.669960   0.182331  -3.674 0.000238
Age                                       -0.014485   0.003323  -4.359 1.31e-05
English.DifficultiesMuch                   0.497108   0.163062   3.049 0.002299
English.DifficultiesNot much               0.062462   0.150904   0.414 0.678932
English.DifficultiesVery much             -0.224421   0.146238  -1.535 0.124876
Full.Time.EmploymentEmployed full time    -0.417455   0.100582  -4.150 3.32e-05
Helpful.CommunityAgree                     1.094653   0.555671   1.970 0.048842
Helpful.CommunityDisagree                  1.442807   0.551394   2.617 0.008880
Helpful.CommunityNeutral                   1.115089   0.546111   2.042 0.041164
Helpful.CommunityStrongly agree            1.286134   0.596826   2.155 0.031165
ReligionBuddhist                           0.164853   0.213445   0.772 0.439910
ReligionCatholic                           0.052290   0.229066   0.228 0.819432
ReligionHindu                             -0.290346   0.323946  -0.896 0.370103
ReligionMuslim                            -0.261756   0.403336  -0.649 0.516353
ReligionOther                             -0.329418   0.417405  -0.789 0.429993
ReligionProtestant                        -0.070350   0.215719  -0.326 0.744335
Get.AlongAgree                             0.455990   0.580675   0.785 0.432292
Get.AlongDisagree                          0.424073   0.561892   0.755 0.450415
Get.AlongNeutral                           0.308745   0.569997   0.542 0.588052
Get.AlongStrongly agree                    0.470386   0.636725   0.739 0.460054
ExpressionSomewhat agree                   0.155592   0.375439   0.414 0.678561
ExpressionSomewhat disagree                0.366586   0.377331   0.972 0.331288
ExpressionStrongly agree                  -0.030690   0.391770  -0.078 0.937561
Religious.ImportanceNot very important     0.100784   0.199292   0.506 0.613061
Religious.ImportanceSomewhat important    -0.004287   0.216124  -0.020 0.984173
Religious.ImportanceVery important        -0.172505   0.233551  -0.739 0.460138
English.SpeakingNot well                  -0.060861   0.250295  -0.243 0.807883
English.SpeakingVery well                  0.130977   0.261275   0.501 0.616159
English.SpeakingWell                      -0.105323   0.249102  -0.423 0.672433
Community.TrustAgree                      -0.105091   0.462910  -0.227 0.820407
Community.TrustDisagree                   -0.110171   0.441164  -0.250 0.802798
Community.TrustNeutral                    -0.163027   0.450399  -0.362 0.717381
Community.TrustStrongly agree             -0.029100   0.535987  -0.054 0.956703
Religious.AttendanceNever                 -0.338172   0.189725  -1.782 0.074678
Religious.AttendanceOnce or twice a month -0.010424   0.175749  -0.059 0.952704
Religious.AttendanceSeldom                -0.282600   0.183881  -1.537 0.124327
Family.RespectSomewhat agree              -0.495115   0.559476  -0.885 0.376177
Family.RespectSomewhat disagree           -0.576795   0.555232  -1.039 0.298881
Family.RespectStrongly agree              -0.349201   0.569423  -0.613 0.539709
Close.knit.CommunityAgree                 -0.640598   0.411912  -1.555 0.119903
Close.knit.CommunityDisagree              -0.744618   0.409129  -1.820 0.068758
Close.knit.CommunityNeutral               -0.666998   0.402707  -1.656 0.097664
Close.knit.CommunityStrongly agree        -0.837243   0.458592  -1.826 0.067898
Successful.FamilySomewhat agree            0.694712   0.581141   1.195 0.231920
Successful.FamilySomewhat disagree         0.571446   0.582572   0.981 0.326641
Successful.FamilyStrongly agree            1.004185   0.596320   1.684 0.092188
LoyaltySomewhat agree                     -0.230806   0.644665  -0.358 0.720325
LoyaltySomewhat disagree                   0.017210   0.644853   0.027 0.978709
LoyaltyStrongly agree                     -0.280903   0.654361  -0.429 0.667720
                                             
(Intercept)                                  
EthnicityAsian Indian                        
EthnicityFilipino                            
EthnicityKorean                           ***
EthnicityOther                            ** 
EthnicityVietnamese                       ***
Age                                       ***
English.DifficultiesMuch                  ** 
English.DifficultiesNot much                 
English.DifficultiesVery much                
Full.Time.EmploymentEmployed full time    ***
Helpful.CommunityAgree                    *  
Helpful.CommunityDisagree                 ** 
Helpful.CommunityNeutral                  *  
Helpful.CommunityStrongly agree           *  
ReligionBuddhist                             
ReligionCatholic                             
ReligionHindu                                
ReligionMuslim                               
ReligionOther                                
ReligionProtestant                           
Get.AlongAgree                               
Get.AlongDisagree                            
Get.AlongNeutral                             
Get.AlongStrongly agree                      
ExpressionSomewhat agree                     
ExpressionSomewhat disagree                  
ExpressionStrongly agree                     
Religious.ImportanceNot very important       
Religious.ImportanceSomewhat important       
Religious.ImportanceVery important           
English.SpeakingNot well                     
English.SpeakingVery well                    
English.SpeakingWell                         
Community.TrustAgree                         
Community.TrustDisagree                      
Community.TrustNeutral                       
Community.TrustStrongly agree                
Religious.AttendanceNever                 .  
Religious.AttendanceOnce or twice a month    
Religious.AttendanceSeldom                   
Family.RespectSomewhat agree                 
Family.RespectSomewhat disagree              
Family.RespectStrongly agree                 
Close.knit.CommunityAgree                    
Close.knit.CommunityDisagree              .  
Close.knit.CommunityNeutral               .  
Close.knit.CommunityStrongly agree        .  
Successful.FamilySomewhat agree              
Successful.FamilySomewhat disagree           
Successful.FamilyStrongly agree           .  
LoyaltySomewhat agree                        
LoyaltySomewhat disagree                     
LoyaltyStrongly agree                        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2667.5  on 1925  degrees of freedom
Residual deviance: 2522.2  on 1872  degrees of freedom
AIC: 2630.2

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Family
                     LR Chisq Df Pr(>Chisq)    
Ethnicity             26.0657  5  8.666e-05 ***
Age                   19.1896  1  1.184e-05 ***
English.Difficulties  19.9895  3  0.0001706 ***
Full.Time.Employment  17.3306  1  3.141e-05 ***
Helpful.Community      8.4105  4  0.0776476 .  
Religion               3.3316  6  0.7662166    
Get.Along              1.6094  4  0.8071070    
Expression             3.5196  3  0.3182353    
Religious.Importance   2.6403  3  0.4504635    
English.Speaking       3.0835  3  0.3789242    
Community.Trust        0.3619  4  0.9854745    
Religious.Attendance   5.6145  3  0.1319463    
Family.Respect         2.0952  3  0.5528927    
Close.knit.Community   4.1280  4  0.3889646    
Successful.Family      5.4185  3  0.1435970    
Loyalty                1.0368  3  0.7923476    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T) |> DT::datatable()

Health Professionals

rfdata <- qol |> select(`Heal Professionals`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Heal.Professionals~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Heal.Professionals~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Heal.Professionals ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 22.3 secs 

 VSURF selected: 
    34 variables at thresholding step (in 8.4 secs)
    14 variables at interpretation step (in 6.8 secs)
    2 variables at prediction step (in 7.2 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
[1] "English.Speaking" "Dental.Insurance"
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "English.Speaking"     "Ethnicity"            "Dental.Insurance"    
 [4] "Religion"             "English.Difficulties" "Health.Insurance"    
 [7] "Community.Trust"      "Religious.Attendance" "Get.Along"           
[10] "Helpful.Friends"      "Helpful.Community"    "See.Family"          
[13] "Close.knit.Community" "Income_median"       
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.1991697

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance
1         English.Speaking    0.03496       0.00138
2                Ethnicity    0.03188       0.00101
3         Dental.Insurance    0.03173       0.00116
4                 Religion    0.02632       0.00066
5     English.Difficulties    0.02255       0.00106
6         Health.Insurance    0.01514       0.00065
7          Community.Trust    0.01460       0.00052
8     Religious.Attendance    0.01350       0.00054
9                Get.Along    0.01327       0.00054
10         Helpful.Friends    0.01202       0.00050
11       Helpful.Community    0.01201       0.00061
12              See.Family    0.01171       0.00050
13    Close.knit.Community    0.01094       0.00040
14           Income_median    0.01079       0.00053
15             See.Friends    0.01062       0.00043
16 Community.Shares.Values    0.00995       0.00047
17     Spend.Time.Together    0.00886       0.00056
18    Religious.Importance    0.00883       0.00052
19          Helpful.Family    0.00768       0.00064
20            Close.Family    0.00755       0.00052
21           Close.Friends    0.00660       0.00050
22                     Age    0.00646       0.00036
23          Discrimination    0.00532       0.00040
24          Family.Respect    0.00529       0.00035
25       Successful.Family    0.00522       0.00033
26            Family.Pride    0.00516       0.00035
27              Feel.Close    0.00494       0.00035
28              Expression    0.00491       0.00031
29                   Trust    0.00449       0.00029
30                 Loyalty    0.00431       0.00035
31          Similar.Values    0.00406       0.00036
32            Togetherness    0.00371       0.00021
33                  Gender    0.00317       0.00024
34    Full.Time.Employment    0.00300       0.00029

Logistic regression (Interpretation)

lr <- rfdata |> select(Heal.Professionals,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Heal.Professionals~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Heal.Professionals ~ ., family = binomial, data = lr)

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                -1.0810     0.2293  -4.714 2.43e-06 ***
English.SpeakingNot well    0.3027     0.2488   1.217  0.22363    
English.SpeakingVery well   1.2561     0.2448   5.132 2.87e-07 ***
English.SpeakingWell        0.8592     0.2444   3.515  0.00044 ***
Dental.InsuranceYes         0.5217     0.1007   5.180 2.22e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2668.3  on 1926  degrees of freedom
Residual deviance: 2530.9  on 1922  degrees of freedom
AIC: 2540.9

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Heal.Professionals
                 LR Chisq Df Pr(>Chisq)    
English.Speaking   70.887  3  2.756e-15 ***
Dental.Insurance   26.820  1  2.234e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Physical Check-up

#install.packages("randomForestSRC)

rfdata <- qol |> 
  select(`Physical Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) %>%
  na.omit() |> 
  rename(Employment=`Full Time Employment`,
         EnglishSpeak=`English Speaking`,
         EnglishDiff=`English Difficulties`) |> 
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Physical.Check.up~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Physical.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Physical.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 23.2 secs 

 VSURF selected: 
    34 variables at thresholding step (in 8.2 secs)
    15 variables at interpretation step (in 6.5 secs)
    3 variables at prediction step (in 8.5 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
[1] "Dental.Insurance" "Health.Insurance" "Age"             
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Dental.Insurance"        "Health.Insurance"       
 [3] "Age"                     "Ethnicity"              
 [5] "Religion"                "Income_median"          
 [7] "EnglishDiff"             "Close.knit.Community"   
 [9] "EnglishSpeak"            "Community.Trust"        
[11] "Get.Along"               "Community.Shares.Values"
[13] "Religious.Attendance"    "Helpful.Community"      
[15] "Gender"                 
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.149244

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance
1         Dental.Insurance    0.06225       0.00146
2         Health.Insurance    0.04407       0.00118
3                      Age    0.04351       0.00102
4                Ethnicity    0.03320       0.00091
5                 Religion    0.02402       0.00069
6            Income_median    0.02351       0.00132
7              EnglishDiff    0.02089       0.00080
8     Close.knit.Community    0.01636       0.00058
9             EnglishSpeak    0.01566       0.00069
10         Community.Trust    0.01557       0.00059
11               Get.Along    0.01520       0.00061
12 Community.Shares.Values    0.01495       0.00072
13    Religious.Attendance    0.01167       0.00040
14       Helpful.Community    0.01131       0.00043
15                  Gender    0.01114       0.00057
16          Discrimination    0.01055       0.00058
17              Expression    0.00986       0.00049
18    Religious.Importance    0.00914       0.00041
19              See.Family    0.00851       0.00054
20       Successful.Family    0.00845       0.00042
21          Helpful.Family    0.00780       0.00053
22           Close.Friends    0.00778       0.00040
23            Close.Family    0.00748       0.00055
24             See.Friends    0.00733       0.00046
25     Spend.Time.Together    0.00717       0.00046
26         Helpful.Friends    0.00663       0.00056
27          Similar.Values    0.00657       0.00027
28          Family.Respect    0.00623       0.00029
29            Family.Pride    0.00548       0.00022
30              Feel.Close    0.00538       0.00029
31                 Loyalty    0.00513       0.00023
32                   Trust    0.00467       0.00038
33              Employment    0.00449       0.00030
34            Togetherness    0.00429       0.00029

Logistic regression (Interpretation)

lr <- rfdata |> select(Physical.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Physical.Check.up~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Physical.Check.up ~ ., family = binomial, data = lr)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -1.927796   0.204799  -9.413  < 2e-16 ***
Dental.InsuranceYes  0.884275   0.115918   7.628 2.38e-14 ***
Health.InsuranceYes  1.195771   0.159916   7.477 7.58e-14 ***
Age                  0.028965   0.003435   8.434  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2405.0  on 1917  degrees of freedom
Residual deviance: 2148.8  on 1914  degrees of freedom
AIC: 2156.8

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Physical.Check.up
                 LR Chisq Df Pr(>Chisq)    
Dental.Insurance   57.892  1  2.769e-14 ***
Health.Insurance   58.248  1  2.311e-14 ***
Age                76.835  1  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Dental Check-up

rfdata <- qol |> select(`Dentist Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Dentist.Check.up~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Dentist.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Dentist.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 26.4 secs 

 VSURF selected: 
    34 variables at thresholding step (in 8 secs)
    16 variables at interpretation step (in 6.4 secs)
    10 variables at prediction step (in 12 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Dental.Insurance"        "Ethnicity"              
 [3] "English.Speaking"        "Community.Trust"        
 [5] "See.Family"              "See.Friends"            
 [7] "Community.Shares.Values" "Close.knit.Community"   
 [9] "Get.Along"               "Religious.Attendance"   
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Dental.Insurance"        "Ethnicity"              
 [3] "Religion"                "Income_median"          
 [5] "Age"                     "Health.Insurance"       
 [7] "Religious.Importance"    "English.Difficulties"   
 [9] "English.Speaking"        "Community.Trust"        
[11] "See.Family"              "See.Friends"            
[13] "Community.Shares.Values" "Close.knit.Community"   
[15] "Get.Along"               "Religious.Attendance"   
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.1689817

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance
1         Dental.Insurance    0.07975       0.00124
2                Ethnicity    0.03747       0.00087
3                 Religion    0.03295       0.00094
4            Income_median    0.02204       0.00065
5                      Age    0.01849       0.00052
6         Health.Insurance    0.01589       0.00069
7     Religious.Importance    0.01535       0.00067
8     English.Difficulties    0.01519       0.00048
9         English.Speaking    0.01454       0.00050
10         Community.Trust    0.01400       0.00060
11              See.Family    0.01361       0.00059
12             See.Friends    0.01179       0.00059
13 Community.Shares.Values    0.00951       0.00046
14    Close.knit.Community    0.00930       0.00055
15               Get.Along    0.00908       0.00051
16    Religious.Attendance    0.00853       0.00039
17            Close.Family    0.00810       0.00058
18          Discrimination    0.00794       0.00038
19       Helpful.Community    0.00787       0.00050
20                  Gender    0.00746       0.00069
21           Close.Friends    0.00731       0.00044
22     Spend.Time.Together    0.00727       0.00041
23              Feel.Close    0.00721       0.00044
24          Helpful.Family    0.00662       0.00055
25         Helpful.Friends    0.00641       0.00039
26              Expression    0.00562       0.00034
27          Similar.Values    0.00548       0.00037
28       Successful.Family    0.00512       0.00035
29                   Trust    0.00423       0.00029
30    Full.Time.Employment    0.00407       0.00040
31            Family.Pride    0.00366       0.00033
32          Family.Respect    0.00359       0.00032
33            Togetherness    0.00322       0.00025
34                 Loyalty    0.00270       0.00022

Logistic regression (Interpretation)

lr <- rfdata |> select(Dentist.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Dentist.Check.up~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Dentist.Check.up ~ ., family = binomial, data = lr)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                               -1.606e+00  4.957e-01  -3.239
Dental.InsuranceYes                        1.515e+00  1.110e-01  13.642
EthnicityAsian Indian                     -1.409e+00  1.767e-01  -7.974
EthnicityFilipino                         -3.099e-01  2.213e-01  -1.400
EthnicityKorean                           -4.581e-01  1.703e-01  -2.690
EthnicityOther                            -4.781e-01  2.546e-01  -1.878
EthnicityVietnamese                       -1.360e-01  1.756e-01  -0.774
English.SpeakingNot well                   9.322e-01  2.691e-01   3.464
English.SpeakingVery well                  1.354e+00  2.755e-01   4.916
English.SpeakingWell                       1.022e+00  2.695e-01   3.791
Community.TrustAgree                       1.561e-01  4.744e-01   0.329
Community.TrustDisagree                   -1.679e-01  4.497e-01  -0.373
Community.TrustNeutral                     6.964e-02  4.578e-01   0.152
Community.TrustStrongly agree             -3.030e-02  5.712e-01  -0.053
See.Family                                 1.170e-01  4.406e-02   2.655
See.Friends                               -5.455e-06  4.291e-02   0.000
Community.Shares.ValuesAgree               4.233e-01  5.454e-01   0.776
Community.Shares.ValuesDisagree            2.318e-01  5.392e-01   0.430
Community.Shares.ValuesNeutral             2.772e-01  5.351e-01   0.518
Community.Shares.ValuesStrongly agree      1.346e-01  6.164e-01   0.218
Close.knit.CommunityAgree                 -2.303e-01  3.875e-01  -0.594
Close.knit.CommunityDisagree               5.051e-02  3.917e-01   0.129
Close.knit.CommunityNeutral               -2.009e-01  3.776e-01  -0.532
Close.knit.CommunityStrongly agree         7.670e-01  4.499e-01   1.705
Get.AlongAgree                            -3.445e-02  6.921e-01  -0.050
Get.AlongDisagree                         -2.498e-02  6.663e-01  -0.037
Get.AlongNeutral                           3.748e-02  6.809e-01   0.055
Get.AlongStrongly agree                   -5.985e-01  7.565e-01  -0.791
Religious.AttendanceNever                 -1.558e-01  1.860e-01  -0.837
Religious.AttendanceOnce or twice a month -6.317e-03  1.792e-01  -0.035
Religious.AttendanceSeldom                -1.515e-01  1.954e-01  -0.776
                                          Pr(>|z|)    
(Intercept)                               0.001197 ** 
Dental.InsuranceYes                        < 2e-16 ***
EthnicityAsian Indian                     1.53e-15 ***
EthnicityFilipino                         0.161515    
EthnicityKorean                           0.007139 ** 
EthnicityOther                            0.060366 .  
EthnicityVietnamese                       0.438694    
English.SpeakingNot well                  0.000531 ***
English.SpeakingVery well                 8.85e-07 ***
English.SpeakingWell                      0.000150 ***
Community.TrustAgree                      0.742042    
Community.TrustDisagree                   0.708798    
Community.TrustNeutral                    0.879093    
Community.TrustStrongly agree             0.957705    
See.Family                                0.007928 ** 
See.Friends                               0.999899    
Community.Shares.ValuesAgree              0.437676    
Community.Shares.ValuesDisagree           0.667225    
Community.Shares.ValuesNeutral            0.604424    
Community.Shares.ValuesStrongly agree     0.827147    
Close.knit.CommunityAgree                 0.552317    
Close.knit.CommunityDisagree              0.897407    
Close.knit.CommunityNeutral               0.594612    
Close.knit.CommunityStrongly agree        0.088228 .  
Get.AlongAgree                            0.960305    
Get.AlongDisagree                         0.970094    
Get.AlongNeutral                          0.956103    
Get.AlongStrongly agree                   0.428840    
Religious.AttendanceNever                 0.402459    
Religious.AttendanceOnce or twice a month 0.971883    
Religious.AttendanceSeldom                0.437988    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2593.0  on 1914  degrees of freedom
Residual deviance: 2199.3  on 1884  degrees of freedom
AIC: 2261.3

Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Dentist.Check.up
                        LR Chisq Df Pr(>Chisq)    
Dental.Insurance         197.515  1  < 2.2e-16 ***
Ethnicity                 82.944  5  < 2.2e-16 ***
English.Speaking          27.657  3  4.286e-06 ***
Community.Trust            2.021  4   0.731859    
See.Family                 7.067  1   0.007850 ** 
See.Friends                0.000  1   0.999899    
Community.Shares.Values    1.964  4   0.742462    
Close.knit.Community      16.557  4   0.002356 ** 
Get.Along                  3.327  4   0.504609    
Religious.Attendance       1.826  3   0.609189    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()

Folkmedicine

rfdata <- qol |> select(`Folkmedicine`, Ethnicity, Age, Gender,Religion, `Full Time Employment`,  Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |> 
    na.omit() |>
  as.data.frame() |> 
  rename_with(make.names)

imbal <- ROSE::ROSE(Folkmedicine~.,
                          data=rfdata,
                          seed=3)$data

VSURF(Folkmedicine~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.mod
Warning in VSURF.formula(Folkmedicine ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()

 VSURF computation time: 57.7 secs 

 VSURF selected: 
    34 variables at thresholding step (in 7.6 secs)
    33 variables at interpretation step (in 7.1 secs)
    20 variables at prediction step (in 43 secs)

 VSURF ran in parallel on a PSOCK cluster and used 15 cores 
names(rfdata[,-1])[vsurf.mod$varselect.pred]
 [1] "Ethnicity"               "Age"                    
 [3] "Religion"                "Full.Time.Employment"   
 [5] "Religious.Attendance"    "English.Difficulties"   
 [7] "Get.Along"               "Religious.Importance"   
 [9] "Helpful.Community"       "Community.Shares.Values"
[11] "Community.Trust"         "Close.knit.Community"   
[13] "Feel.Close"              "Similar.Values"         
[15] "Togetherness"            "Gender"                 
[17] "Family.Respect"          "Loyalty"                
[19] "Income_median"           "Dental.Insurance"       
names(rfdata[,-1])[vsurf.mod$varselect.interp]
 [1] "Ethnicity"               "Age"                    
 [3] "Religion"                "English.Speaking"       
 [5] "Full.Time.Employment"    "Religious.Attendance"   
 [7] "English.Difficulties"    "Get.Along"              
 [9] "Religious.Importance"    "Helpful.Community"      
[11] "Discrimination"          "Community.Shares.Values"
[13] "Community.Trust"         "Close.knit.Community"   
[15] "See.Family"              "Feel.Close"             
[17] "Helpful.Friends"         "Close.Family"           
[19] "See.Friends"             "Helpful.Family"         
[21] "Similar.Values"          "Togetherness"           
[23] "Expression"              "Spend.Time.Together"    
[25] "Close.Friends"           "Gender"                 
[27] "Trust"                   "Successful.Family"      
[29] "Family.Pride"            "Family.Respect"         
[31] "Loyalty"                 "Income_median"          
[33] "Dental.Insurance"       
plot(vsurf.mod)

vsurf.mod$mean.perf
[1] 0.112059

Importance

vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
                Importance = vsurf.mod$imp.mean.dec,
                sd_Importance = vsurf.mod$imp.sd.dec
)

vi |> mutate(across(Importance:sd_Importance,~round(.x,5)))
                  Variable Importance sd_Importance
1                Ethnicity    0.07012       0.00168
2                      Age    0.04766       0.00106
3                 Religion    0.04361       0.00080
4         English.Speaking    0.03855       0.00136
5     Full.Time.Employment    0.03009       0.00155
6     Religious.Attendance    0.02845       0.00118
7     English.Difficulties    0.02418       0.00075
8                Get.Along    0.02316       0.00063
9     Religious.Importance    0.02066       0.00079
10       Helpful.Community    0.01931       0.00097
11          Discrimination    0.01919       0.00090
12 Community.Shares.Values    0.01862       0.00066
13         Community.Trust    0.01835       0.00059
14    Close.knit.Community    0.01702       0.00054
15              See.Family    0.01410       0.00039
16              Feel.Close    0.01380       0.00082
17         Helpful.Friends    0.01350       0.00060
18            Close.Family    0.01290       0.00042
19             See.Friends    0.01254       0.00058
20          Helpful.Family    0.01245       0.00055
21          Similar.Values    0.01168       0.00038
22            Togetherness    0.01091       0.00051
23              Expression    0.01085       0.00041
24     Spend.Time.Together    0.00979       0.00037
25           Close.Friends    0.00962       0.00031
26                  Gender    0.00920       0.00051
27                   Trust    0.00888       0.00038
28       Successful.Family    0.00844       0.00041
29            Family.Pride    0.00814       0.00039
30          Family.Respect    0.00783       0.00043
31                 Loyalty    0.00683       0.00041
32           Income_median    0.00661       0.00033
33        Dental.Insurance    0.00514       0.00041
34        Health.Insurance    0.00301       0.00022

Logistic regression (Interpretation)

lr <- rfdata |> select(Folkmedicine,names(rfdata[,-1])[vsurf.mod$varselect.pred])

lr_mod <- glm(Folkmedicine~.,family=binomial,data=lr)
summary(lr_mod)

Call:
glm(formula = Folkmedicine ~ ., family = binomial, data = lr)

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                               -2.640291   0.896661  -2.945  0.00323
EthnicityAsian Indian                     -0.153457   0.460432  -0.333  0.73892
EthnicityFilipino                         -0.817383   0.354354  -2.307  0.02107
EthnicityKorean                            0.061646   0.216011   0.285  0.77535
EthnicityOther                            -0.755186   0.386572  -1.954  0.05076
EthnicityVietnamese                       -1.337963   0.296822  -4.508 6.56e-06
Age                                        0.026520   0.004552   5.827 5.66e-09
ReligionBuddhist                           0.439710   0.317670   1.384  0.16631
ReligionCatholic                           0.011301   0.363164   0.031  0.97518
ReligionHindu                             -0.400501   0.538575  -0.744  0.45710
ReligionMuslim                            -2.475080   1.135925  -2.179  0.02934
ReligionOther                              0.419359   0.623588   0.672  0.50127
ReligionProtestant                         0.074212   0.322011   0.230  0.81773
Full.Time.EmploymentEmployed full time    -0.254735   0.162907  -1.564  0.11789
Religious.AttendanceNever                  0.049044   0.313793   0.156  0.87580
Religious.AttendanceOnce or twice a month  0.292082   0.289355   1.009  0.31277
Religious.AttendanceSeldom                 0.202232   0.308119   0.656  0.51160
English.DifficultiesMuch                   0.043201   0.214955   0.201  0.84072
English.DifficultiesNot much               0.077021   0.192937   0.399  0.68974
English.DifficultiesVery much             -0.113785   0.235636  -0.483  0.62918
Get.AlongAgree                             0.922242   1.007104   0.916  0.35981
Get.AlongDisagree                          0.514194   0.980497   0.524  0.59999
Get.AlongNeutral                           1.291197   0.990589   1.303  0.19242
Get.AlongStrongly agree                    1.091672   1.090678   1.001  0.31687
Religious.ImportanceNot very important    -0.045192   0.303786  -0.149  0.88174
Religious.ImportanceSomewhat important    -0.084846   0.344392  -0.246  0.80540
Religious.ImportanceVery important         0.040328   0.366743   0.110  0.91244
Helpful.CommunityAgree                     0.142683   0.823359   0.173  0.86242
Helpful.CommunityDisagree                  0.677331   0.794198   0.853  0.39374
Helpful.CommunityNeutral                   0.089187   0.807354   0.110  0.91204
Helpful.CommunityStrongly agree           -0.254427   0.916498  -0.278  0.78131
Community.Shares.ValuesAgree              -0.014955   0.790263  -0.019  0.98490
Community.Shares.ValuesDisagree            0.169678   0.776478   0.219  0.82702
Community.Shares.ValuesNeutral             0.242902   0.776683   0.313  0.75448
Community.Shares.ValuesStrongly agree     -0.777929   0.938196  -0.829  0.40701
Community.TrustAgree                      -1.030154   0.574986  -1.792  0.07319
Community.TrustDisagree                   -1.071120   0.531727  -2.014  0.04397
Community.TrustNeutral                    -1.507299   0.550992  -2.736  0.00623
Community.TrustStrongly agree             -0.218153   0.728461  -0.299  0.76458
Close.knit.CommunityAgree                 -0.471825   0.514065  -0.918  0.35871
Close.knit.CommunityDisagree              -0.757531   0.506246  -1.496  0.13456
Close.knit.CommunityNeutral               -0.451102   0.490450  -0.920  0.35769
Close.knit.CommunityStrongly agree        -0.460142   0.622897  -0.739  0.46008
Feel.CloseSomewhat agree                  -1.016763   0.798333  -1.274  0.20280
Feel.CloseSomewhat disagree               -1.131442   0.807353  -1.401  0.16109
Feel.CloseStrongly agree                  -1.309587   0.821546  -1.594  0.11092
Similar.ValuesSomewhat agree               0.110678   0.698945   0.158  0.87418
Similar.ValuesSomewhat disagree           -0.125115   0.705137  -0.177  0.85917
Similar.ValuesStrongly agree               0.236325   0.718582   0.329  0.74225
TogethernessSomewhat agree                 1.529338   1.056668   1.447  0.14781
TogethernessSomewhat disagree              1.823154   1.050015   1.736  0.08251
TogethernessStrongly agree                 1.719991   1.065960   1.614  0.10662
GenderMale                                -0.184717   0.156435  -1.181  0.23769
Family.RespectSomewhat agree              -1.286228   0.740486  -1.737  0.08239
Family.RespectSomewhat disagree           -1.318913   0.755416  -1.746  0.08082
Family.RespectStrongly agree              -1.299311   0.760823  -1.708  0.08768
LoyaltySomewhat agree                      0.776312   1.058637   0.733  0.46337
LoyaltySomewhat disagree                   1.367005   1.051909   1.300  0.19376
LoyaltyStrongly agree                      0.656261   1.070174   0.613  0.53973
Income_medianAbove                         0.007527   0.162203   0.046  0.96299
Dental.InsuranceYes                        0.381994   0.169205   2.258  0.02397
                                             
(Intercept)                               ** 
EthnicityAsian Indian                        
EthnicityFilipino                         *  
EthnicityKorean                              
EthnicityOther                            .  
EthnicityVietnamese                       ***
Age                                       ***
ReligionBuddhist                             
ReligionCatholic                             
ReligionHindu                                
ReligionMuslim                            *  
ReligionOther                                
ReligionProtestant                           
Full.Time.EmploymentEmployed full time       
Religious.AttendanceNever                    
Religious.AttendanceOnce or twice a month    
Religious.AttendanceSeldom                   
English.DifficultiesMuch                     
English.DifficultiesNot much                 
English.DifficultiesVery much                
Get.AlongAgree                               
Get.AlongDisagree                            
Get.AlongNeutral                             
Get.AlongStrongly agree                      
Religious.ImportanceNot very important       
Religious.ImportanceSomewhat important       
Religious.ImportanceVery important           
Helpful.CommunityAgree                       
Helpful.CommunityDisagree                    
Helpful.CommunityNeutral                     
Helpful.CommunityStrongly agree              
Community.Shares.ValuesAgree                 
Community.Shares.ValuesDisagree              
Community.Shares.ValuesNeutral               
Community.Shares.ValuesStrongly agree        
Community.TrustAgree                      .  
Community.TrustDisagree                   *  
Community.TrustNeutral                    ** 
Community.TrustStrongly agree                
Close.knit.CommunityAgree                    
Close.knit.CommunityDisagree                 
Close.knit.CommunityNeutral                  
Close.knit.CommunityStrongly agree           
Feel.CloseSomewhat agree                     
Feel.CloseSomewhat disagree                  
Feel.CloseStrongly agree                     
Similar.ValuesSomewhat agree                 
Similar.ValuesSomewhat disagree              
Similar.ValuesStrongly agree                 
TogethernessSomewhat agree                   
TogethernessSomewhat disagree             .  
TogethernessStrongly agree                   
GenderMale                                   
Family.RespectSomewhat agree              .  
Family.RespectSomewhat disagree           .  
Family.RespectStrongly agree              .  
LoyaltySomewhat agree                        
LoyaltySomewhat disagree                     
LoyaltyStrongly agree                        
Income_medianAbove                           
Dental.InsuranceYes                       *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1505.5  on 1898  degrees of freedom
Residual deviance: 1330.0  on 1838  degrees of freedom
AIC: 1452

Number of Fisher Scoring iterations: 6
car::Anova(lr_mod)
Analysis of Deviance Table (Type II tests)

Response: Folkmedicine
                        LR Chisq Df Pr(>Chisq)    
Ethnicity                 29.911  5  1.536e-05 ***
Age                       34.423  1  4.435e-09 ***
Religion                  13.192  6   0.040093 *  
Full.Time.Employment       2.463  1   0.116546    
Religious.Attendance       1.536  3   0.674059    
English.Difficulties       0.698  3   0.873790    
Get.Along                  8.026  4   0.090615 .  
Religious.Importance       0.404  3   0.939369    
Helpful.Community          4.465  4   0.346696    
Community.Shares.Values    4.195  4   0.380204    
Community.Trust           13.505  4   0.009056 ** 
Close.knit.Community       2.616  4   0.624005    
Feel.Close                 3.395  3   0.334665    
Similar.Values             1.150  3   0.764943    
Togetherness               4.097  3   0.251217    
Gender                     1.401  1   0.236517    
Family.Respect             3.029  3   0.387116    
Loyalty                    3.890  3   0.273603    
Income_median              0.002  1   0.962988    
Dental.Insurance           5.171  1   0.022963 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()